Modifying original code

Original file: Fig3_4_6_ExtFig5-6-10_RPM_RPMA_Allos_CellTag.R

This notebook is modified from the original code to remove the preprocessing steps and show figures inline within the notebook.

There is a separate notebook for the CellTag analysis.

Load the Seurat object and CellTag clones

TBO_seurat<-readRDS("../data/07_2025_RPM_RPMA_TBO_CellTag_Seurat_wSigs_FA_dpt_final.rds")
TBO_seurat
An object of class Seurat 
110982 features across 26618 samples within 2 assays 
Active assay: RNA (55491 features, 0 variable features)
 2 layers present: counts, data
 1 other assay present: norm
 2 dimensional reductions calculated: umap, fa
clones <- readRDS("../data/05_2025_RPM_RPMA_TBOAllo_CellTagClones_Onlyclones.rds")
clones
An object of class Seurat 
110982 features across 5965 samples within 2 assays 
Active assay: RNA (55491 features, 0 variable features)
 2 layers present: counts, data
 1 other assay present: norm
 2 dimensional reductions calculated: umap, fa

Load organoid data as SingleCellExperiment

sce <- as.SingleCellExperiment(TBO_seurat, assay = "norm")
Warning :The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
ℹ Please use the `layer` argument instead.
ℹ The deprecated feature was likely used in the Seurat package.
  Please report the issue at <]8;;https://github.com/satijalab/seurat/issueshttps://github.com/satijalab/seurat/issues]8;;>.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_]8;;ide:run:warnings()warnings()]8;;` to see where this warning was generated.
phenotypes <- as.character(unique(sce@colData@listData$Pheno))
pheno_pairs_list <- combn(phenotypes, 2, simplify = FALSE)

Define color palettes

my_colors <- c(
  "#E41A1C", # strong red
  "#377EB8", # medium blue
  "#4DAF4A", # green
  "#984EA3", # purple
  "#FF7F00", # orange
  "#FFFF33", # yellow
  "#A65628", # brown
  "#e7298a", # pink
  "#666666", # grey
  "#66C2A5", # teal
  "#FC8D62", # salmon
  "#8DA0CB", # soft blue
  "#E78AC3", # soft pink (different from 8)
  "#A6D854", # light green (but yellowish tint, not green)
  "#FFD92F", # lemon yellow
  "#E5C494", # light brown
  "#B3B3B3", # light grey
  "#1B9E77", # deep teal
  "#D95F02", # dark orange
  "#7570B3", # strong purple
  "#66A61E"  # olive green (NOT same green as before)
)

Define function to plot violin plots by phenotype

Copied from 02_WT_RPM_Organoids_Allografts_notebook.Rmd

Needs SingleCellExperiment sce object and pheno_pairs_list defined in the notebook.

plotVlnByPhen <- function(feature, yl=c(-0.1,0.3), focus=NA) {
    if(!is.na(focus)) {
        pheno_pairs_list <- pheno_pairs_list[sapply(pheno_pairs_list, function(x) focus %in% x)]
    }
    
    scater::plotColData(sce, x = "Pheno", y = feature, colour_by = "Pheno") + 
        scale_discrete_manual(aesthetics = c("colour", "fill"), values=pheno_col) +
        theme(axis.title.y = element_text(size = 24), axis.text.y = element_text(size = 16),
              axis.text.x = element_blank(),   # rotate x-axis labels
              plot.title = element_blank(),    # remove plot title
              legend.position = "none"         # remove legend
        ) + labs(x = "",                     # custom x-axis title
                 y = "Signature score"        # custom y-axis title
        ) + ylim(yl[1], yl[2]) + 
        geom_boxplot(fill=pheno_col, alpha=1/5, position = position_dodge(width = .2),
                     size=0.2, color="black", notch=TRUE, notchwidth=0.3, outlier.shape = 2, outlier.colour=NA) # + 
        # stat_summary(fun = mean,
        #              geom = "point",
        #              shape = 18,
        #              size = 2,
        #              color = "red",
        #              position = position_dodge(width = 0.5)) + 
        # ggpubr::stat_compare_means(method = "wilcox.test", label = "p.signif", size=4, 
        #                            vjust = 0.5,
        #                            hide.ns = TRUE, comparisons = pheno_pairs_list)
}

Fig. 2e UMAP

DimPlot(TBO_seurat, group.by='Genotype', cols=c("darkorchid4","orange"),
        pt.size = 0.1,
        reduction='umap', label=FALSE, label.size=7, shuffle=TRUE) & NoAxes()

Fig. 2f (UMAP by Leiden cluster)

DimPlot(TBO_seurat, group.by='leiden_scVI_1.2', 
        cols=my_colors, reduction='umap', label=TRUE, label.size=5,
        pt.size = 0.1) & NoAxes() +
    theme(legend.position="none")

Idents(TBO_seurat) <- 'leiden_scVI_1.2'

x <- table(TBO_seurat@meta.data$Genotype,Idents(TBO_seurat))
proportions <- as.data.frame(100*prop.table(x, margin = 1))

colnames(proportions) <- c("Sample", "Cluster", "Frequency")

# ggpubr::ggbarplot(proportions, x="Sample", y="Frequency", fill = "Sample", group = "Sample",
#                   ylab = "Frequency (percent)", xlab="Phase", palette =c("indianred3","green3","royalblue4")) +
#     theme_bw() + facet_wrap(facets = "Cluster", scales="free_y", ncol =4) +
#     theme(axis.text.y = element_text(size=6)) + ggpubr::rotate_x_text(angle = 45)

Fig 2f

# Stacked
p <- ggplot(proportions, aes(fill=Cluster, y=Frequency, x=Sample)) + 
    geom_bar(position="stack", stat="identity")

p + scale_fill_manual(values=my_colors) + theme_bw() + 
    theme(axis.text.y = element_text(size=16), 
          axis.text.x = element_text(size=16, angle = 90, vjust = 0.5, hjust=1), 
          axis.title.x = element_text(size=16), axis.title.y = element_text(size=16), 
          legend.text = element_text(size=12), legend.title = element_blank()) + 
    labs(x = NULL, y = "Sample composition (%)")

Fig. 2h

UMAP colored by SCLC fate

# Define fate color vector and use to plot by cell state
pheno_col <- c("brown2","darkorchid4","dodgerblue","#66A61E","orange","turquoise4","turquoise")
DimPlot(TBO_seurat, group.by='Pheno', cols=pheno_col, reduction='umap', label=FALSE, 
        shuffle=TRUE, label.size=6) & NoAxes()

Fig. 2h (stacked barplot by Pheno)

Idents(TBO_seurat) <-'Pheno'

x <- table(TBO_seurat@meta.data$Genotype,Idents(TBO_seurat))
proportions <- as.data.frame(100*prop.table(x, margin = 1))

colnames(proportions)<-c("Cluster", "Sample", "Frequency")

p <- ggplot(proportions, aes(fill=Sample, y=Frequency, x=Cluster)) + 
    geom_bar(position="stack", stat="identity")

p + scale_fill_manual(values=pheno_col) + theme_bw() + 
    theme(axis.text.y = element_text(size=20), 
          axis.text.x=element_text(size=20, angle = 90, hjust = 1, vjust = 0.5), 
          axis.title.x = element_text(size=20), axis.title.y = element_text(size=20), 
          legend.text = element_text(size=20), legend.title = element_text(size=20)) + 
    labs(x = NULL)

Fig. 2i UMAP

NE score (correlation)

FeaturePlot(TBO_seurat, features = c("NE_spearman"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()

Fig 2i (violin plots of NE score by SCLC fate)

plotVlnByPhen("NE_spearman", yl=c(-.7, .8))

Fig 2i (violin plots of NE score by genotype)

x <- scater::plotColData(sce, x = "Genotype", y = "NE_spearman", colour_by = "Genotype") + 
    scale_discrete_manual(aesthetics = c("colour", "fill"), values=c("darkorchid4","orange"))

x <- x + theme(axis.title.y = element_text(size = 24),axis.text.y = element_text(size = 16), 
               axis.text.x = element_blank(), 
               # plot.title = element_blank(),
               legend.position = "none") + labs(x = "",  y = "Signature score") +  ylim(-1,1) + 
    geom_boxplot(fill=c("darkorchid4","orange"), alpha=1/5, 
                 position = position_dodge(width = .2), size=0.2,
                 color="black", notch=TRUE, notchwidth=0.3, outlier.shape = 2, outlier.colour=NA)

## Perform wilcoxon rank-sum test for RPM vs RPMA on the NE score data (Fig. 3i) ##
x + stat_summary(fun = mean,
                 geom = "point",
                 shape = 18,
                 size = 2,
                 color = "red",
                 position = position_dodge(width = 0.9)) + 
    ggpubr::stat_compare_means(method = "wilcox.test",label = "p.signif", 
                               hide.ns = TRUE,comparisons = list(c("RPM", "RPMA")))

Fig. 2j

Fig 2j (UMAP marker gene expression)

Equivalent color schemes:
* viridis::scale_color_viridis(option="rocket", direction=-1)
* scale_color_gradientn(colors=viridis::rocket(10, direction=-1))

# TF target gene scores in UMAP
FeaturePlot(TBO_seurat, features = c("ASCL1_Targets1"), pt.size=0.2, 
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()

FeaturePlot(TBO_seurat, features = c("NEUROD1_Targets1"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()

FeaturePlot(TBO_seurat, features = c("ATOH1_Targets1"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()

FeaturePlot(TBO_seurat, features = c("POU2F3_Targets1"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()

FeaturePlot(TBO_seurat, features = c("YAP_Activity1"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()

VlnPlots by genotype with wilcoxon rank sum tests

Fig. 2j (violin plots)

plotTFscoresVln <- function(score_name, yl=c(-.1, .32)) {
    a <- VlnPlot(TBO_seurat,
                 features = c(score_name),
                 group.by = "Genotype",
                 cols = c("darkorchid4", "orange"),
                 pt.size = 0.01,
                 alpha = 0.05,
                 ncol = 1) + 
        theme(axis.title.y = element_text(size = 20),
              axis.text.x = element_text(angle = 0, hjust = 0.5, size=20),  # rotate x-axis labels
              # plot.title = element_blank(),                                 # remove plot title
              legend.position = "none"                                      # remove legend
        ) + 
        labs(x = "",             # custom x-axis title
             y = "Expression"  # custom y-axis title
        ) + ylim(yl[1], yl[2]) + 
        stat_summary(fun = mean,
                     geom = "point",
                     shape = 18,
                     size = 2,
                     color = "red",
                     position = position_dodge(width = 0.9)) + 
        ggpubr::stat_compare_means(method = "wilcox.test", label = "p.signif", size=8, 
                                   hide.ns = TRUE, comparisons = list(c("RPM", "RPMA")))
        
        # Add mean point and do wilcoxon rank sum test 
    return(a)
}

Fig 2j (insets)

plotTFscoresVln("ATOH1_Targets1")

plotTFscoresVln("ASCL1_Targets1", yl=c(-0.1, 0.8))

plotTFscoresVln("NEUROD1_Targets1")

plotTFscoresVln("POU2F3_Targets1")

plotTFscoresVln("YAP_Activity1", yl=c(-0.1, 1))

Fig. 2k (UMAP by cell type consensus signature)

FeaturePlot(TBO_seurat, features = c("NE_Consensus1"), pt.size=0.2, 
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()

FeaturePlot(TBO_seurat, features = c("Basal_Consensus1"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()

FeaturePlot(TBO_seurat, features = c("Tuft_Consensus1"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()

Fig. 2k (violin plots of consensus signatures by SCLC fate)

plotVlnByPhen("NE_Consensus1", yl=c(-0.5, 1.3))

plotVlnByPhen("Tuft_Consensus1", yl=c(-0.25, 1))

plotVlnByPhen("Basal_Consensus1", yl=c(-0.1, 1.1))

Fig 2l (violin plots of SCLC arhcetype scores by phenotype)

plotVlnByPhen("A_Archetype1", yl=c(-0.05, 0.27))

plotVlnByPhen("A2_Archetype1", yl=c(-0.05, 0.2))

plotVlnByPhen("N_Archetype1", yl=c(0, 0.3))

plotVlnByPhen("P_Archetype1", yl=c(0, 0.27))

Ext Data Fig 5c

plotStackedBarByGeno <- function(gene, lab) {
    dat <- FetchData(TBO_seurat, vars = c(gene, "Genotype"))
    dat <- dat %>%
      mutate(Expression_Status = ifelse(dat[,gene] > 0.01, "Positive", "Negative"))
    
    x <- table(dat$Genotype, dat$Expression_Status)
    
    proportions <- as.data.frame(prop.table(x, margin = 1))
    colnames(proportions) <- c("Cluster", "Sample", "Fraction")

    p <- ggplot(proportions, aes(fill = Sample, y = Fraction, x = Cluster)) + 
        geom_bar(position = "stack", stat = "identity")
    p <- p + scale_fill_manual(values=c("blue4","red3"))+ theme_bw() + 
        theme(axis.text.y = element_text(size=20), axis.text.x=element_text(size=20), 
              axis.title.x =element_text(size=20), axis.title.y = element_text(size=20), 
              legend.text = element_text(size=20), legend.title = element_text(size=0), 
              plot.title = element_text(size = 18, face = "plain", hjust = 0.5)) + 
        labs(y = lab, x = NULL)
    return(p)
}
plotStackedBarByGeno("Ascl1", "Fraction of Ascl1-hi cells (>0.01)")

plotStackedBarByGeno("Neurod1", "Fraction of Neurod1-hi cells (>0.01)")

plotStackedBarByGeno("Pou2f3", "Fraction of Pou2f3-hi cells (>0.01)")

Extended Data Fig. 5e

Violin plots of expression of TFs by Leiden cluster

genes <- c("Ascl1","Neurod1","Pou2f3")

# Create violin plots with Kruskal-Wallis test
plots <- lapply(genes, function(gene) {
  p <- VlnPlot(TBO_seurat,
               features = gene,
               group.by = "leiden_scVI_1.2",
               cols = my_colors,
               alpha = 0.5) +  # smaller dots
    ggpubr::stat_compare_means(method = "kruskal.test", 
                       label = "p.format", 
                       label.x = 2,size = 5) +
    ggtitle("") +
    theme(plot.title = element_text(face = "italic"), legend.position = "none",  # Remove legend
          axis.title.x = element_blank(),axis.title.y = element_text(size = 20),
          axis.text.y = element_text(size = 20), axis.text.x = element_text(size = 14)) +  # Remove x-axis label
    labs(y = "Expression")  # Change y-axis label to "Expression"
  return(p)
})

# Arrange plots
patchwork::wrap_plots(plots, ncol = 3)

Ext Data Fig 10

FeaturePlot(TBO_seurat, features = c("Iono_Mouse_Ext1"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()

FeaturePlot(TBO_seurat, features = c("Iono_Human1"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()

Ext Data Fig. 10e (violin plots of consensus signatures by SCLC fate)

plotVlnByPhen("Iono_Mouse_Ext1", yl=c(-0.1, 0.45))

plotVlnByPhen("Iono_Human1", yl=c(-0.1, .75))

Fig 5d

Caris SCLC tumor signatures in basal cells

caris_feat <-  c("Caris_A1","Caris_Y1","Caris_N1","Caris_Mixed1","Caris_P1","Caris_TN-1")
library(viridis)
v <- FeaturePlot(TBO_seurat, features = caris_feat, pt.size=0.2, reduction='umap') 
v <- lapply(v, `+`, scale_color_viridis(option="rocket", direction=-1))
lapply(v, `+`, NoAxes())
[[1]]

[[2]]

[[3]]

[[4]]

[[5]]

[[6]]

Fig 5d (violin plots of human tumor SCLC scores by SCLC fate)

plotVlnByPhen("Caris_A1", yl=c(-0.1, 0.2))

plotVlnByPhen("Caris_Y1", yl=c(-0.1, 0.2))

plotVlnByPhen("Caris_N1", yl=c(-0.1, 0.3))

plotVlnByPhen("Caris_Mixed1", yl=c(-0.1, 0.15))

plotVlnByPhen("Caris_P1", yl=c(-0.1, 0.25))

plotVlnByPhen("Caris_TN.1", yl=c(-0.1, 0.25))

Fig. 5e

Gene signature scores correlation heatmap

signature_matrix <- FetchData(TBO_seurat, vars = c("Caris_A1", "George_A1", "Liu_A1","Lissa_A1","ASCL1_Targets1",
                                                   "Caris_N1","George_N1", "Liu_N1","Lissa_N1", "NEUROD1_Targets1",
                                                   "Caris_P1","George_P1", "Liu_P1","POU2F3_Targets1",
                                                   "Caris_Y1","George_Y1", "Lissa_Y1", "YAP_Activity1",
                                                   "T_Cell_Inflamed_Gay1","MHC_Sig_Gay1",
                                                   "NE_Consensus1","Basal_Consensus1","Tuft_Consensus1"))

# Compute Pearson correlation matrix
correlation_matrix <- cor(signature_matrix, method = "pearson")

# Define color scale from -1 to 1
breaks_seq <- seq(-1, 1, length.out = 100)  # Ensure scale covers full correlation range

pheatmap::pheatmap(correlation_matrix, 
         color = scico::scico(100, palette = "berlin"),
         display_numbers = FALSE, 
         breaks = breaks_seq, number_color = "gray80",fontsize_number=6,
         main = "Signature correlation in mouse TBO Allografts", cluster_cols = TRUE, cluster_rows=TRUE)

Fig 5g

Inflammatory signatures in basal cells

# Fig 5g feature plot (MHC_Sig_Gay1="Antigen presentation") #
FeaturePlot(TBO_seurat, features = c("MHC_Sig_Gay1"), pt.size=0.2, 
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()


### NMF3-I signature for Fig. 5g
FeaturePlot(TBO_seurat, features = c("NMF3-I1"), pt.size=0.2, 
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()


### Gay et al SCLC-I signature for Fig. 5g
FeaturePlot(TBO_seurat, features = c("Gay_SCLC-I1"), pt.size=0.2, 
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()

Fig 5g (violin plots of inflammation scores by SCLC fate)

plotVlnByPhen("MHC_Sig_Gay1", yl=c(-0.1, 0.8))

plotVlnByPhen("NMF3.I1", yl=c(-0.1, 0.5))

plotVlnByPhen("Gay_SCLC.I1", yl=c(-0.1, 0.3))

Fig 5h (violin plots of therapeutic target gene expression by SCLC fate)

x_labs <- c("A","A/N","N","At","P","SL","B")

v <- VlnPlot(TBO_seurat, features = c("Dll3", "Ncam1", "Sez6", "Tacstd2"), 
        group.by=c("Pheno"), cols=pheno_col,alpha=0.05, ncol=4)
v <- lapply(v,  `+`, labs(x=NULL, y="Expression"))
v <- lapply(v,  `+`, theme(axis.text.x = element_text(angle=90)))
v <- lapply(v,  `+`,  scale_x_discrete(labels=x_labs))

patchwork::wrap_plots(v, ncol=4)

Ext Data Fig 6a

DimPlot(TBO_seurat, group.by='GenoCT', cols=my_colors, shuffle=TRUE, 
        reduction='umap', label=FALSE, label.size=6) & NoAxes() + NoLegend()

DimPlot(TBO_seurat, group.by='GenoCT', cols=my_colors, shuffle=TRUE, 
        reduction='fa', label=FALSE, label.size=6) & NoAxes() + NoLegend()

CellTag analysis in other notebook

---
title: "RPM RPMA Allos notebook"
author: "Abbie Ireland, Darren Tyson"
date: "2025-06-24"
output: html_notebook
---

### Modifying original code

Original file: [Fig3_4_6_ExtFig5-6-10_RPM_RPMA_Allos_CellTag.R](Preprocessing_for_reference/R_Code/Fig3_4_6_ExtFig5-6-10_RPM_RPMA_Allos_CellTag.R)

This notebook is modified from the original code to remove the preprocessing steps and show figures inline within the notebook.

There is a [separate notebook for the CellTag analysis](04_RPM_RPMA_Allos_CellTag_notebook.nb.html).  

### Related to:

-   Fig 2e-l
-   Fig 5d-h
-   Extended Data Fig 5c,e
-   Extended Data Fig 6a
-   Extended Data Fig 10e

```{r}
suppressPackageStartupMessages({
    library(Seurat)
    library(SeuratObject)
    library(SummarizedExperiment)
    library(ggplot2)
    library(ggpubr)
})
```

### Load the Seurat object and CellTag clones

```{r}
TBO_seurat<-readRDS("../data/07_2025_RPM_RPMA_TBO_CellTag_Seurat_wSigs_FA_dpt_final.rds")
TBO_seurat

clones <- readRDS("../data/05_2025_RPM_RPMA_TBOAllo_CellTagClones_Onlyclones.rds")
clones
```

### Load organoid data as SingleCellExperiment

```{r}
sce <- as.SingleCellExperiment(TBO_seurat, assay = "norm")
```

```{r}
phenotypes <- as.character(unique(sce@colData@listData$Pheno))
pheno_pairs_list <- combn(phenotypes, 2, simplify = FALSE)
```

### Define color palettes

```{r}
my_colors <- c(
  "#E41A1C", # strong red
  "#377EB8", # medium blue
  "#4DAF4A", # green
  "#984EA3", # purple
  "#FF7F00", # orange
  "#FFFF33", # yellow
  "#A65628", # brown
  "#e7298a", # pink
  "#666666", # grey
  "#66C2A5", # teal
  "#FC8D62", # salmon
  "#8DA0CB", # soft blue
  "#E78AC3", # soft pink (different from 8)
  "#A6D854", # light green (but yellowish tint, not green)
  "#FFD92F", # lemon yellow
  "#E5C494", # light brown
  "#B3B3B3", # light grey
  "#1B9E77", # deep teal
  "#D95F02", # dark orange
  "#7570B3", # strong purple
  "#66A61E"  # olive green (NOT same green as before)
)

```

#### Define function to plot violin plots by phenotype

Copied from [02_WT_RPM_Organoids_Allografts_notebook.Rmd](02_WT_RPM_Organoids_Allografts_notebook.Rmd)

Needs SingleCellExperiment `sce` object and `pheno_pairs_list` defined in the notebook.

```{r}
plotVlnByPhen <- function(feature, yl=c(-0.1,0.3), focus=NA) {
    if(!is.na(focus)) {
        pheno_pairs_list <- pheno_pairs_list[sapply(pheno_pairs_list, function(x) focus %in% x)]
    }
    
    scater::plotColData(sce, x = "Pheno", y = feature, colour_by = "Pheno") + 
        scale_discrete_manual(aesthetics = c("colour", "fill"), values=pheno_col) +
        theme(axis.title.y = element_text(size = 24), axis.text.y = element_text(size = 16),
              axis.text.x = element_blank(),   # rotate x-axis labels
              plot.title = element_blank(),    # remove plot title
              legend.position = "none"         # remove legend
        ) + labs(x = "",                     # custom x-axis title
                 y = "Signature score"        # custom y-axis title
        ) + ylim(yl[1], yl[2]) + 
        geom_boxplot(fill=pheno_col, alpha=1/5, position = position_dodge(width = .2),
                     size=0.2, color="black", notch=TRUE, notchwidth=0.3, outlier.shape = 2, outlier.colour=NA) # + 
        # stat_summary(fun = mean,
        #              geom = "point",
        #              shape = 18,
        #              size = 2,
        #              color = "red",
        #              position = position_dodge(width = 0.5)) + 
        # ggpubr::stat_compare_means(method = "wilcox.test", label = "p.signif", size=4, 
        #                            vjust = 0.5,
        #                            hide.ns = TRUE, comparisons = pheno_pairs_list)
}
```

### Fig. 2e UMAP

```{r fig.height=6, fig.width=7, message=FALSE, warning=FALSE}
DimPlot(TBO_seurat, group.by='Genotype', cols=c("darkorchid4","orange"),
        pt.size = 0.1,
        reduction='umap', label=FALSE, label.size=7, shuffle=TRUE) & NoAxes()

```

### Fig. 2f (UMAP by Leiden cluster)

```{r fig.height=6, fig.width=6, message=FALSE, warning=FALSE}
DimPlot(TBO_seurat, group.by='leiden_scVI_1.2', 
        cols=my_colors, reduction='umap', label=TRUE, label.size=5,
        pt.size = 0.1) & NoAxes() +
    theme(legend.position="none")

```

```{r fig.height=8, fig.width=10, message=FALSE, warning=FALSE}
Idents(TBO_seurat) <- 'leiden_scVI_1.2'

x <- table(TBO_seurat@meta.data$Genotype,Idents(TBO_seurat))
proportions <- as.data.frame(100*prop.table(x, margin = 1))

colnames(proportions) <- c("Sample", "Cluster", "Frequency")

# ggpubr::ggbarplot(proportions, x="Sample", y="Frequency", fill = "Sample", group = "Sample",
#                   ylab = "Frequency (percent)", xlab="Phase", palette =c("indianred3","green3","royalblue4")) +
#     theme_bw() + facet_wrap(facets = "Cluster", scales="free_y", ncol =4) +
#     theme(axis.text.y = element_text(size=6)) + ggpubr::rotate_x_text(angle = 45)
```

#### Fig 2f

```{r fig.height=3.5, fig.width=3.5, message=FALSE, warning=FALSE}
# Stacked
p <- ggplot(proportions, aes(fill=Cluster, y=Frequency, x=Sample)) + 
    geom_bar(position="stack", stat="identity")

p + scale_fill_manual(values=my_colors) + theme_bw() + 
    theme(axis.text.y = element_text(size=16), 
          axis.text.x = element_text(size=16, angle = 90, vjust = 0.5, hjust=1), 
          axis.title.x = element_text(size=16), axis.title.y = element_text(size=16), 
          legend.text = element_text(size=12), legend.title = element_blank()) + 
    labs(x = NULL, y = "Sample composition (%)")
```

### Fig. 2h

UMAP colored by SCLC fate

```{r fig.height=6, fig.width=7, message=FALSE, warning=FALSE}
# Define fate color vector and use to plot by cell state
pheno_col <- c("brown2","darkorchid4","dodgerblue","#66A61E","orange","turquoise4","turquoise")
DimPlot(TBO_seurat, group.by='Pheno', cols=pheno_col, reduction='umap', label=FALSE, 
        shuffle=TRUE, label.size=6) & NoAxes()

```

#### Fig. 2h (stacked barplot by Pheno)

```{r fig.height=4, fig.width=4, message=FALSE, warning=FALSE}
Idents(TBO_seurat) <-'Pheno'

x <- table(TBO_seurat@meta.data$Genotype,Idents(TBO_seurat))
proportions <- as.data.frame(100*prop.table(x, margin = 1))

colnames(proportions)<-c("Cluster", "Sample", "Frequency")

p <- ggplot(proportions, aes(fill=Sample, y=Frequency, x=Cluster)) + 
    geom_bar(position="stack", stat="identity")

p + scale_fill_manual(values=pheno_col) + theme_bw() + 
    theme(axis.text.y = element_text(size=20), 
          axis.text.x=element_text(size=20, angle = 90, hjust = 1, vjust = 0.5), 
          axis.title.x = element_text(size=20), axis.title.y = element_text(size=20), 
          legend.text = element_text(size=20), legend.title = element_text(size=20)) + 
    labs(x = NULL)
```

#### Fig. 2i UMAP

NE score (correlation)

```{r fig.height=3, fig.width=3.5, message=FALSE, warning=FALSE}
FeaturePlot(TBO_seurat, features = c("NE_spearman"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()
```

#### Fig 2i (violin plots of NE score by SCLC fate)

```{r fig.height=4, fig.width=3, message=FALSE, warning=FALSE}
plotVlnByPhen("NE_spearman", yl=c(-.7, .8))
```

#### Fig 2i (violin plots of NE score by genotype)

```{r fig.height=3, fig.width=2, message=FALSE, warning=FALSE}
x <- scater::plotColData(sce, x = "Genotype", y = "NE_spearman", colour_by = "Genotype") + 
    scale_discrete_manual(aesthetics = c("colour", "fill"), values=c("darkorchid4","orange"))

x <- x + theme(axis.title.y = element_text(size = 24),axis.text.y = element_text(size = 16), 
               axis.text.x = element_blank(), 
               # plot.title = element_blank(),
               legend.position = "none") + labs(x = "",  y = "Signature score") +  ylim(-1,1) + 
    geom_boxplot(fill=c("darkorchid4","orange"), alpha=1/5, 
                 position = position_dodge(width = .2), size=0.2,
                 color="black", notch=TRUE, notchwidth=0.3, outlier.shape = 2, outlier.colour=NA)

## Perform wilcoxon rank-sum test for RPM vs RPMA on the NE score data (Fig. 3i) ##
x + stat_summary(fun = mean,
                 geom = "point",
                 shape = 18,
                 size = 2,
                 color = "red",
                 position = position_dodge(width = 0.9)) + 
    ggpubr::stat_compare_means(method = "wilcox.test",label = "p.signif", 
                               hide.ns = TRUE,comparisons = list(c("RPM", "RPMA")))

```

### Fig. 2j

#### Fig 2j (UMAP marker gene expression)

Equivalent color schemes:\
\* `viridis::scale_color_viridis(option="rocket", direction=-1)`\
\* `scale_color_gradientn(colors=viridis::rocket(10, direction=-1))`

```{r fig.height=3, fig.width=4, message=FALSE, warning=FALSE}
# TF target gene scores in UMAP
FeaturePlot(TBO_seurat, features = c("ASCL1_Targets1"), pt.size=0.2, 
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()
FeaturePlot(TBO_seurat, features = c("NEUROD1_Targets1"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()
FeaturePlot(TBO_seurat, features = c("ATOH1_Targets1"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()
FeaturePlot(TBO_seurat, features = c("POU2F3_Targets1"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()
FeaturePlot(TBO_seurat, features = c("YAP_Activity1"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()

```

### VlnPlots by genotype with wilcoxon rank sum tests

#### Fig. 2j (violin plots)

```{r}
plotTFscoresVln <- function(score_name, yl=c(-.1, .32)) {
    a <- VlnPlot(TBO_seurat,
                 features = c(score_name),
                 group.by = "Genotype",
                 cols = c("darkorchid4", "orange"),
                 pt.size = 0.01,
                 alpha = 0.05,
                 ncol = 1) + 
        theme(axis.title.y = element_text(size = 20),
              axis.text.x = element_text(angle = 0, hjust = 0.5, size=20),  # rotate x-axis labels
              # plot.title = element_blank(),                                 # remove plot title
              legend.position = "none"                                      # remove legend
        ) + 
        labs(x = "",             # custom x-axis title
             y = "Expression"  # custom y-axis title
        ) + ylim(yl[1], yl[2]) + 
        stat_summary(fun = mean,
                     geom = "point",
                     shape = 18,
                     size = 2,
                     color = "red",
                     position = position_dodge(width = 0.9)) + 
        ggpubr::stat_compare_means(method = "wilcox.test", label = "p.signif", size=8, 
                                   hide.ns = TRUE, comparisons = list(c("RPM", "RPMA")))
        
        # Add mean point and do wilcoxon rank sum test 
    return(a)
}
```

#### Fig 2j (insets)

```{r fig.height=3, fig.width=3, message=FALSE, warning=FALSE}
plotTFscoresVln("ATOH1_Targets1")
plotTFscoresVln("ASCL1_Targets1", yl=c(-0.1, 0.8))
plotTFscoresVln("NEUROD1_Targets1")
plotTFscoresVln("POU2F3_Targets1")
plotTFscoresVln("YAP_Activity1", yl=c(-0.1, 1))
```

### Fig. 2k (UMAP by cell type consensus signature)

```{r fig.height=3, fig.width=4, message=FALSE, warning=FALSE}
FeaturePlot(TBO_seurat, features = c("NE_Consensus1"), pt.size=0.2, 
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()
FeaturePlot(TBO_seurat, features = c("Basal_Consensus1"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()
FeaturePlot(TBO_seurat, features = c("Tuft_Consensus1"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()
```

### Fig. 2k (violin plots of consensus signatures by SCLC fate)

```{r fig.height=4, fig.width=3, message=FALSE, warning=FALSE}
plotVlnByPhen("NE_Consensus1", yl=c(-0.5, 1.3))
plotVlnByPhen("Tuft_Consensus1", yl=c(-0.25, 1))
plotVlnByPhen("Basal_Consensus1", yl=c(-0.1, 1.1))
```

#### Fig 2l (violin plots of SCLC arhcetype scores by phenotype)

```{r fig.height=4, fig.width=3, message=FALSE, warning=FALSE}
plotVlnByPhen("A_Archetype1", yl=c(-0.05, 0.27))
plotVlnByPhen("A2_Archetype1", yl=c(-0.05, 0.2))
plotVlnByPhen("N_Archetype1", yl=c(0, 0.3))
plotVlnByPhen("P_Archetype1", yl=c(0, 0.27))
```

#### Ext Data Fig 5c

```{r}
plotStackedBarByGeno <- function(gene, lab) {
    dat <- FetchData(TBO_seurat, vars = c(gene, "Genotype"))
    dat <- dat %>%
      mutate(Expression_Status = ifelse(dat[,gene] > 0.01, "Positive", "Negative"))
    
    x <- table(dat$Genotype, dat$Expression_Status)
    
    proportions <- as.data.frame(prop.table(x, margin = 1))
    colnames(proportions) <- c("Cluster", "Sample", "Fraction")

    p <- ggplot(proportions, aes(fill = Sample, y = Fraction, x = Cluster)) + 
        geom_bar(position = "stack", stat = "identity")
    p <- p + scale_fill_manual(values=c("blue4","red3"))+ theme_bw() + 
        theme(axis.text.y = element_text(size=20), axis.text.x=element_text(size=20), 
              axis.title.x =element_text(size=20), axis.title.y = element_text(size=20), 
              legend.text = element_text(size=20), legend.title = element_text(size=0), 
              plot.title = element_text(size = 18, face = "plain", hjust = 0.5)) + 
        labs(y = lab, x = NULL)
    return(p)
}
```

```{r fig.height=6, fig.width=6, message=FALSE, warning=FALSE}
plotStackedBarByGeno("Ascl1", "Fraction of Ascl1-hi cells (>0.01)")
plotStackedBarByGeno("Neurod1", "Fraction of Neurod1-hi cells (>0.01)")
plotStackedBarByGeno("Pou2f3", "Fraction of Pou2f3-hi cells (>0.01)")
```

### Extended Data Fig. 5e

Violin plots of expression of TFs by Leiden cluster

```{r fig.height=4, fig.width=10, message=FALSE, warning=FALSE}
genes <- c("Ascl1","Neurod1","Pou2f3")

# Create violin plots with Kruskal-Wallis test
plots <- lapply(genes, function(gene) {
  p <- VlnPlot(TBO_seurat,
               features = gene,
               group.by = "leiden_scVI_1.2",
               cols = my_colors,
               alpha = 0.5) +  # smaller dots
    ggpubr::stat_compare_means(method = "kruskal.test", 
                       label = "p.format", 
                       label.x = 2,size = 5) +
    ggtitle("") +
    theme(plot.title = element_text(face = "italic"), legend.position = "none",  # Remove legend
          axis.title.x = element_blank(),axis.title.y = element_text(size = 20),
          axis.text.y = element_text(size = 20), axis.text.x = element_text(size = 14)) +  # Remove x-axis label
    labs(y = "Expression")  # Change y-axis label to "Expression"
  return(p)
})

# Arrange plots
patchwork::wrap_plots(plots, ncol = 3)
```

#### Ext Data Fig 10

```{r fig.height=3, fig.width=4, message=FALSE, warning=FALSE}
FeaturePlot(TBO_seurat, features = c("Iono_Mouse_Ext1"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()
FeaturePlot(TBO_seurat, features = c("Iono_Human1"), pt.size=0.2,
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()
```

### Ext Data Fig. 10e (violin plots of consensus signatures by SCLC fate)

```{r fig.height=4, fig.width=3, message=FALSE, warning=FALSE}
plotVlnByPhen("Iono_Mouse_Ext1", yl=c(-0.1, 0.45))
plotVlnByPhen("Iono_Human1", yl=c(-0.1, .75))
```

#### Fig 5d

Caris SCLC tumor signatures in basal cells

```{r fig.height=3, fig.width=4, message=FALSE, warning=FALSE}
caris_feat <-  c("Caris_A1","Caris_Y1","Caris_N1","Caris_Mixed1","Caris_P1","Caris_TN-1")
library(viridis)
v <- FeaturePlot(TBO_seurat, features = caris_feat, pt.size=0.2, reduction='umap') 
v <- lapply(v, `+`, scale_color_viridis(option="rocket", direction=-1))
lapply(v, `+`, NoAxes())
```

#### Fig 5d (violin plots of human tumor SCLC scores by SCLC fate)

```{r fig.height=4, fig.width=3.5, message=FALSE, warning=FALSE}
plotVlnByPhen("Caris_A1", yl=c(-0.1, 0.2))
plotVlnByPhen("Caris_Y1", yl=c(-0.1, 0.2))
plotVlnByPhen("Caris_N1", yl=c(-0.1, 0.3))
plotVlnByPhen("Caris_Mixed1", yl=c(-0.1, 0.15))
plotVlnByPhen("Caris_P1", yl=c(-0.1, 0.25))
plotVlnByPhen("Caris_TN.1", yl=c(-0.1, 0.25))
```

#### Fig. 5e

Gene signature scores correlation heatmap

```{r fig.height=8, fig.width=9}
signature_matrix <- FetchData(TBO_seurat, vars = c("Caris_A1", "George_A1", "Liu_A1","Lissa_A1","ASCL1_Targets1",
                                                   "Caris_N1","George_N1", "Liu_N1","Lissa_N1", "NEUROD1_Targets1",
                                                   "Caris_P1","George_P1", "Liu_P1","POU2F3_Targets1",
                                                   "Caris_Y1","George_Y1", "Lissa_Y1", "YAP_Activity1",
                                                   "T_Cell_Inflamed_Gay1","MHC_Sig_Gay1",
                                                   "NE_Consensus1","Basal_Consensus1","Tuft_Consensus1"))

# Compute Pearson correlation matrix
correlation_matrix <- cor(signature_matrix, method = "pearson")

# Define color scale from -1 to 1
breaks_seq <- seq(-1, 1, length.out = 100)  # Ensure scale covers full correlation range

pheatmap::pheatmap(correlation_matrix, 
         color = scico::scico(100, palette = "berlin"),
         display_numbers = FALSE, 
         breaks = breaks_seq, number_color = "gray80",fontsize_number=6,
         main = "Signature correlation in mouse TBO Allografts", cluster_cols = TRUE, cluster_rows=TRUE)
```

#### Fig 5g

Inflammatory signatures in basal cells

```{r fig.height=3, fig.width=4, message=FALSE, warning=FALSE}
# Fig 5g feature plot (MHC_Sig_Gay1="Antigen presentation") #
FeaturePlot(TBO_seurat, features = c("MHC_Sig_Gay1"), pt.size=0.2, 
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()

### NMF3-I signature for Fig. 5g
FeaturePlot(TBO_seurat, features = c("NMF3-I1"), pt.size=0.2, 
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()

### Gay et al SCLC-I signature for Fig. 5g
FeaturePlot(TBO_seurat, features = c("Gay_SCLC-I1"), pt.size=0.2, 
            reduction='umap') + viridis::scale_color_viridis(option="rocket",direction=-1) & NoAxes()

```

#### Fig 5g (violin plots of inflammation scores by SCLC fate)

```{r fig.height=4, fig.width=3.5, message=FALSE, warning=FALSE}
plotVlnByPhen("MHC_Sig_Gay1", yl=c(-0.1, 0.8))
plotVlnByPhen("NMF3.I1", yl=c(-0.1, 0.5))
plotVlnByPhen("Gay_SCLC.I1", yl=c(-0.1, 0.3))
```

#### Fig 5h (violin plots of therapeutic target gene expression by SCLC fate)

```{r fig.height=2.5, fig.width=8}
x_labs <- c("A","A/N","N","At","P","SL","B")

v <- VlnPlot(TBO_seurat, features = c("Dll3", "Ncam1", "Sez6", "Tacstd2"), 
        group.by=c("Pheno"), cols=pheno_col,alpha=0.05, ncol=4)
v <- lapply(v,  `+`, labs(x=NULL, y="Expression"))
v <- lapply(v,  `+`, theme(axis.text.x = element_text(angle=90)))
v <- lapply(v,  `+`,  scale_x_discrete(labels=x_labs))

patchwork::wrap_plots(v, ncol=4)
```

#### Ext Data Fig 6a

```{r fig.height=4, fig.width=5}
DimPlot(TBO_seurat, group.by='GenoCT', cols=my_colors, shuffle=TRUE, 
        reduction='umap', label=FALSE, label.size=6) & NoAxes() + NoLegend()
DimPlot(TBO_seurat, group.by='GenoCT', cols=my_colors, shuffle=TRUE, 
        reduction='fa', label=FALSE, label.size=6) & NoAxes() + NoLegend()
```

### CellTag analysis in other notebook
